Analyzing the standard DataFrames



In [7]:

    
import sys
import logging
from pathlib import Path
logger = logging.getLogger('gutils')
logger.handlers = [logging.StreamHandler()]
logger.setLevel(logging.DEBUG)


here = str(Path('.').absolute().parent.parent)
if here not in sys.path:
    sys.path.append(here)
sys.path









    Out[7]:





['',
 '/data/conda/miniconda3-py36/envs/gutils36/lib/python36.zip',
 '/data/conda/miniconda3-py36/envs/gutils36/lib/python3.6',
 '/data/conda/miniconda3-py36/envs/gutils36/lib/python3.6/lib-dynload',
 '/data/conda/miniconda3-py36/envs/gutils36/lib/python3.6/site-packages',
 '/data/conda/miniconda3-py36/envs/gutils36/lib/python3.6/site-packages/cycler-0.10.0-py3.6.egg',
 '/data/conda/miniconda3-py36/envs/gutils36/lib/python3.6/site-packages/pytest_cache-1.0-py3.6.egg',
 '/data/conda/miniconda3-py36/envs/gutils36/lib/python3.6/site-packages/IPython/extensions',
 '/home/kwilcox/.ipython',
 '/data/dev/GUTILS']



In [8]:

    
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.dates as mpd
import matplotlib.pyplot as plt

%matplotlib inline

pd.set_option('display.max_rows', 12)
sns.set()
sns.set_context(context="notebook", font_scale=1.1)

Load a DataFrame of standardized data



In [32]:

    
from pathlib import Path
from gutils.slocum import SlocumReader

ascii_folder = Path('.').absolute().parent.parent / 'gutils' / 'tests' / 'resources' / 'slocum' / 'bass-test-ascii' / 'rt' / 'ascii'
ascii_file = ascii_folder / 'usf_bass_2016_252_1_12_sbd.dat'
slocum_data = SlocumReader(str(ascii_file))
#slocum_data = SlocumReader('/home/kwilcox/Downloads/test_data_gtuils/test_data/ascii2/otn200_2017_349_7_100_dbd.dat')
standard = slocum_data.standardize()
standard[[
    't',
    'y',
    'x',
    'pressure'
]]









    Out[32]:







  
    
      
      t
      y
      x
      pressure
    
  
  
    
      0
      2016-09-09 16:50:20.523319960
      28.366750
      -80.295853
      0.0
    
    
      1
      2016-09-09 16:50:37.544100046
      28.366750
      -80.295853
      NaN
    
    
      2
      2016-09-09 16:50:37.544100046
      28.366750
      -80.295853
      NaN
    
    
      3
      2016-09-09 16:51:35.388819933
      28.367303
      -80.297225
      NaN
    
    
      4
      2016-09-09 16:51:40.490419865
      28.367351
      -80.297344
      NaN
    
    
      5
      2016-09-09 16:51:45.578459978
      28.367398
      -80.297462
      NaN
    
    
      ...
      ...
      ...
      ...
      ...
    
    
      716
      2016-09-09 17:16:26.597440004
      28.370852
      -80.297158
      NaN
    
    
      717
      2016-09-09 17:16:31.914520025
      28.370852
      -80.297158
      NaN
    
    
      718
      2016-09-09 17:16:37.392879963
      28.370852
      -80.297158
      NaN
    
    
      719
      2016-09-09 17:16:42.495819807
      28.370852
      -80.297158
      NaN
    
    
      720
      2016-09-09 17:16:47.813419819
      28.370852
      -80.297158
      NaN
    
    
      721
      2016-09-09 17:16:53.126919985
      28.370852
      -80.297158
      NaN
    
  

722 rows × 4 columns

Assign profiles



In [38]:

    
from gutils.filters import default_filter

def profile_dataframe(default_df, tsint):
    profiled = assign_profiles(default_df, tsint=tsint)
    filtered, _ = default_filter(profiled)
    display(filtered.groupby('profile')[['x', 'y', 'z']].count())
    plot_profiles(filtered, tsint)

    
def plot_profiles(df, tsint):    
    plt.figure(figsize=(14, 8))
    sns.lineplot(
        data=df,
        y='z',
        x='t',
        hue='profile',
        palette="tab20",
        linewidth=2.5,
        legend='full',
        sort=False
    )
    plt.gca().invert_yaxis()
    plt.ylabel('Depth (m_')
    plt.title('Glider depth vs time with {} unique profiles'.format(len(df.profile.unique())))
    plt.tight_layout()



In [39]:

    
from gutils.yo import assign_profiles


# from gutils import (
#     masked_epoch,
#     boxcar_smooth_dataset
# )

# def calculate_delta_depth(interp_data):
#     """ Figure out when the interpolated Z data turns a corner
#     """
#     delta_depth = np.diff(interp_data) 
#     delta_depth[delta_depth <= 0] = -1
#     delta_depth[delta_depth >= 0] = 1
#     delta_depth = boxcar_smooth_dataset(delta_depth, 2)
#     delta_depth[delta_depth <= 0] = -1
#     delta_depth[delta_depth >= 0] = 1
#     return delta_depth


# def assign_profiles(df, tsint=1):
#     profile_df = df.copy()
#     profile_df['profile'] = np.nan  # Fill profile with nans
#     tmp_df = df.copy()

#     if tsint is None:
#         tsint = 1

#     # Make 't' epochs and not a DateTimeIndex
#     tmp_df['t'] = masked_epoch(tmp_df.t)
#     # Set negative depth values to NaN
#     tmp_df.loc[tmp_df.z <= 0, 'z'] = np.nan

#     # Remove any rows where time or z is NaN
#     tmp_df = tmp_df.dropna(subset=['t', 'z'], how='any')

#     if len(tmp_df) < 2:
#         return None

#     # Create the fixed timestamp array from the min timestamp to the max timestamp
#     # spaced by tsint intervals
#     ts = np.arange(tmp_df.t.min(), tmp_df.t.max(), tsint)
#     # Stretch estimated values for interpolation to span entire dataset
#     interp_z = np.interp(
#         ts,
#         tmp_df.t,
#         tmp_df.z,
#         left=tmp_df.z.iloc[0],
#         right=tmp_df.z.iloc[-1]
#     )
    
#     del tmp_df

#     if len(interp_z) < 2:
#         return None

#     filtered_z = boxcar_smooth_dataset(interp_z, max(tsint // 2, 1))
#     delta_depth = calculate_delta_depth(filtered_z)

#     # Find where the depth indexes (-1 and 1) flip
#     inflections = np.where(np.diff(delta_depth) != 0)[0]
#     # Do we have any profiles?
#     if inflections.size < 1:
#         return profile_df

#     # Prepend a zero at the beginning start the series of profiles
#     p_inds = np.insert(inflections, 0, 0)
#     # Append the size of the time array to end the series of profiles
#     p_inds = np.append(p_inds, ts.size - 1)
#     # Zip up neighbors to get the ranges of each profile in interpolated space
#     p_inds = list(zip(p_inds[0:-1], p_inds[1:]))
#     # Convert the profile indexes into datetime objets
#     p_inds = [
#         (
#             pd.to_datetime(ts[int(p0)], unit='s'),
#             pd.to_datetime(ts[int(p1)], unit='s')
#         )
#         for p0, p1 in p_inds
#     ]
    
#     # We have the profiles in interpolated space, now associate this
#     # space with the actual data using the datetimes.
    
#     # Iterate through the profile start/stop indices
#     for profile_index, (min_time, max_time) in enumerate(p_inds):

#         # Get rows between the min and max time
#         time_between = profile_df.t.between(min_time, max_time, inclusive=True)
        
#         # Get indexes of the between rows since we can't assign by the range due to NaT values
#         ixs = profile_df.loc[time_between].index.tolist()

#         # Set the rows profile column to the profile id
#         if len(ixs) > 1:
#             profile_df.loc[ixs[0]:ixs[-1], 'profile'] = profile_index
#         elif len(ixs) == 1:
#             profile_df.loc[ixs[0], 'profile'] = profile_index
#         else:
#             L.debug('No data rows matched the time range of this profile, Skipping.')

#     # Remove rows that were not assigned a profile
#     # profile_df = profile_df.loc[~profile_df.profile.isnull()]

#     return profile_df



In [40]:

    
profile_dataframe(standard, 4)



In [41]:

    
profile_dataframe(standard, 2)



In [42]:

    
profile_dataframe(standard, 1)



In [ ]:



In [ ]:

	x	y	z
profile
0	156	156	84
1	94	94	53
2	152	152	78
3	86	86	47
4	113	113	60
5	61	61	38

	t	y	x	pressure
0	2016-09-09 16:50:20.523319960	28.366750	-80.295853	0.0
1	2016-09-09 16:50:37.544100046	28.366750	-80.295853	NaN
2	2016-09-09 16:50:37.544100046	28.366750	-80.295853	NaN
3	2016-09-09 16:51:35.388819933	28.367303	-80.297225	NaN
4	2016-09-09 16:51:40.490419865	28.367351	-80.297344	NaN
5	2016-09-09 16:51:45.578459978	28.367398	-80.297462	NaN
...	...	...	...	...
716	2016-09-09 17:16:26.597440004	28.370852	-80.297158	NaN
717	2016-09-09 17:16:31.914520025	28.370852	-80.297158	NaN
718	2016-09-09 17:16:37.392879963	28.370852	-80.297158	NaN
719	2016-09-09 17:16:42.495819807	28.370852	-80.297158	NaN
720	2016-09-09 17:16:47.813419819	28.370852	-80.297158	NaN
721	2016-09-09 17:16:53.126919985	28.370852	-80.297158	NaN